# coding:utf-8
# Author : hiicy redldw
# Date : 2019/02/18
import random
def greedy_3(K,R,T,esipilon):
    """
    :param K:摇臂次数
    :param R: 奖赏函数
    :param T:尝试次数
    :param esipilon:率
    :return:r
    """
    r = 0
    Q = [0 for _ in range(K)]  # 摇臂i的平均奖赏
    count = [0 for _ in range(K)]  # i的选中次数
    for i in range(T):
        if random.random() < esipilon:
            k = random.randint(0,K)
        else:
            k = Q.index(max(Q))
        v = R[k]
        r += v
        Q[k] = (Q[k]*count[k]+v) / (count[k] + 1)  #
        count[k] += 1
    return r














